#!/usr/bin/python
#-*- coding: UTF-8

# VALUES_I is holding final values (weights), VALUES_C is holding original
# Unicode codepoints for collision detection

import sys
import mph


def usage():
	print 'usage: %s [TAG] [BMP_ONLY]' % sys.argv[0]
	print
	print '  [TAG]      - tag to use to prefix hashtables'
	print '  [BMP_ONLY] - flag to indicate if set is BMP-only, 0 or 1 (false or true)'

if __name__ == '__main__':
	if len(sys.argv) < 3:
		usage()
		sys.exit(1)

	TAG = sys.argv[1]
	bmp_only = bool(int(sys.argv[2]))

	dict = {}

	for i, line in enumerate(sys.stdin):
		codepoint, replacement = filter(bool, line.strip().split(' '))

		if not replacement:
			continue

		replacement = int(replacement.strip(), base=16)
		dict[codepoint] = (codepoint, replacement)

	(G, V) = mph.create_minimal_perfect_hash(dict)

	assert(len(G) == len(V))

	mph.gen_header(TAG, G, [])
	mph.gen_includes()
	mph.gen_G(TAG, G)
	mph.gen_values(TAG, G, V, bmp_only)
